library(readr)
library(chromVAR)
library(Matrix)
library(BSgenome.Mmusculus.UCSC.mm10)
library(GenomicRanges)
library(plotly)
library(heatmaply)
if (basename(getwd()) != "code") setwd("code")
# For laptop
BiocParallel::register(BiocParallel::MulticoreParam(2, progressbar = FALSE))
Analysis of ImmeGen Consortium ATAC data using chromVAR
# Performed by Caleb Lareau, 24 September
csv <- "../data/160923_Immgen.csv.zip"
counts <- Matrix(data.matrix(read_csv(csv)[,-1]))
peakdf <- read.table("../data/120923_Immgen.bed")
names(peakdf) <- c("chr", "start", "end", "name")
peaks <- GRanges(peakdf)
peaks <- get_gc(peaks, genome = BSgenome.Mmusculus.UCSC.mm10)
praw <- peaks
peaks <- sort(peaks)
counts <- counts[match(peaks,praw),]
namesdf <- read.table("../data/libnames.txt", header = TRUE)
namesvec <- as.character(namesdf[,2])
names(namesvec) <- namesdf[,1]
colnames(counts) <- unname(namesvec[colnames(counts)])
low_bias_samples <- bias_filtering(counts, peaks)
colnames(counts)[!colnames(counts) %in% names(low_bias_samples)]
## [1] "MC.heparinase.PC_3" "MC.heparinase.PC_4"
counts <- counts[, low_bias_samples]
peaks_to_keep <- filter_peaks(counts, peaks, non_overlapping = TRUE)
counts <- counts[peaks_to_keep,]
peaks <- peaks[peaks_to_keep]
bg <- get_background_peaks(counts_mat = counts, bias = peaks)
# chromVAR motifs
#motifs <- get_motifs(species = "Mus musculus")
# load our own
load("../data/cisbp_mm9_unique.08_Jun_2016.RData")
#motif_ix <- match_pwms(pwms, peaks, genome = BSgenome.Mmusculus.UCSC.mm10)
#deviations <- compute_deviations(counts_mat = counts, background_peaks = bg, peak_indices = motif_ix)
#saveRDS(deviations, file = "../output/deviations.rds")
deviations <- readRDS("../output/deviations.rds")
variability <- compute_variability(deviations$z)
labels <- TFBSTools::name(pwms[rownames(variability)])